Load data in R:

Import metadata:

# ps = "~/Documents/GitHub/amchick/data/raw/rgi_CARD/sqm_rgi_chicken.txt"
#   
# ps %>% 
#   # here::here() %>%
#   read_tsv() -> df
# 
# df %>% 
#   # head() %>% 
#   DT::datatable()
# ```

# ```{r}
# ps = "~/Documents/GitHub/amchick/data/raw/rgi_CARD/sqm_rgi_chicken.txt"
#   
# ps %>% 
#   # here::here() %>%
#   read_tsv() -> df
# 
# df %>% 
#   head() %>% 
#   DT::datatable()
# ```


ps = "~/Documents/GitHub/amchick/data/raw/metabarcoding/merged_chicken_human_04.08.2021.tsv"

ps %>% 
  # here::here() %>%
  read_tsv() %>% 
  filter(metagenomic_sample_name %!in% NA) -> meta
## Rows: 600 Columns: 61
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (19): sample, Sample_description, I7_Index_ID, index, I5_Index_ID, index...
## dbl (42): input, filtered, denoisedF, denoisedR, merged, tabled, filtered_pc...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
meta %>% 
  head() %>% 
  DT::datatable()
meta 
## # A tibble: 43 × 61
##    sample      input filtered denoisedF denoisedR merged tabled filtered_pc
##    <chr>       <dbl>    <dbl>     <dbl>     <dbl>  <dbl>  <dbl>       <dbl>
##  1 CR-15-S292  23558    23449     23410     23355  22970  22970       0.995
##  2 CR-17-S97   35320    35021     34992     34870  34419  34419       0.992
##  3 CR-19-S341  17616    17528     17501     17477  17246  17246       0.995
##  4 CR-21-S126   5791     5764      5753      5734   5585   5585       0.995
##  5 CR-46-S191  24284    24178     24128     24062  23531  23531       0.996
##  6 CR-64-S324  10847    10801     10784     10726  10465  10465       0.996
##  7 D-0-S154    11484    11428     11279     11256  10248  10248       0.995
##  8 TR1-15-S168   153      151       144       115     79     79       0.987
##  9 TR1-17-S246 16895    16833     16807     16749  16429  16429       0.996
## 10 TR1-19-S305  9274     9256      9239      9203   8959   8959       0.998
## # … with 33 more rows, and 53 more variables: denoisedF_pc <dbl>,
## #   denoisedR_pc <dbl>, merged_pc <dbl>, filtered_merged_pc <dbl>,
## #   input_merged_pc <dbl>, tabled_joined <dbl>, chimera_out <dbl>,
## #   length_filtered <dbl>, tabled_pc <dbl>, chimera_out_pc <dbl>,
## #   length_filtered_pc <dbl>, Sample_description <chr>, I7_Index_ID <chr>,
## #   index <chr>, I5_Index_ID <chr>, index2 <chr>, Description2 <chr>,
## #   Experiment <chr>, Reactor <chr>, Treatment <chr>, …

Import resistome data:

type = c("protein homolog model")

# df %>% 
#   filter(Model_type %in% type,
#          Cut_Off == "Strict",
#          Nudged %!in% c("TRUE"),
#          Best_Identities > 95, # plot disteibution of bestID vs percentage length ref and color bitscore and symbol class AMR
#          Percentage.Length.of.Reference.Sequence > 80) %>% 
#   select(ORF_ID,
#          Best_Hit_ARO,
#          Model_type,
#          Drug.Class,
#          Resistance.Mechanism,
#          AMR.Gene.Family,
#          Best_Identities,
#          Percentage.Length.of.Reference.Sequence,
#          Note,
#          "Length AA",
#          "Gene name",
#          "Contig ID", 
#          starts_with("TPM"),
#          ) -> fil_df
# 
# fil_df %>% 
#   head() %>% 
#   DT::datatable()

# fil_df %>% 
#   write_tsv(here::here("~/Projects/ETH/Alessia/mobilome/rgi_card_table.tsv"))

"~/Documents/GitHub/amchick/tmp/rgi_card_table.tsv" %>% 
  read_tsv() %>% 
  separate(ORF_ID, into = c("cont", "cont_num", "orf"), sep = "_", remove = FALSE) %>% 
  mutate(cont_id = paste0(cont,"_", cont_num)) %>% 
  select(cont_id, everything()) -> fil_df_fil
## Rows: 108 Columns: 55
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr  (8): ORF_ID, Best_Hit_ARO, Model_type, Drug.Class, Resistance.Mechanism...
## dbl (46): Best_Identities, Percentage.Length.of.Reference.Sequence, Length A...
## lgl  (1): Note
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
fil_df_fil %>% 
  head() %>% 
  DT::datatable()

Import contig mapping:

"/Users/fconstan/Projects/ETH/Alessia/mobilome/chicken/map_megahit_SQM.csv" %>% 
  read_csv()  -> mapping

mapping %>% 
  head() %>% 
  DT::datatable()

Import isescan results:

"/Users/fconstan/Projects/ETH/Alessia/mobilome/chicken/01.SqueezeChicken.fasta.tsv" %>% 
  read_tsv() -> isescan

isescan %>% 
  head() %>% 
  DT::datatable()
isescan %>% 
  group_by(seqID) %>% 
  # add_count(family) %>% 
  # arrange(-n) %>% 
  top_n(1, wt = `E-value4copy`) %>% 
  top_n(1, wt = isBegin) -> isescan_top

Import viralverify results:

"/Users/fconstan/Projects/ETH/Alessia/mobilome/chicken/final.contigs_result_table.csv" %>% 
  read_csv() -> viralverify
## Warning: One or more parsing issues, see `problems()` for details
viralverify %>% 
  head() %>% 
  DT::datatable()

Import DAStool mapping - contig-MAG:

"/Users/fconstan/Projects/ETH/Alessia/anvio_chicken_SM_last/DAS-collection.txt.txt" %>% 
  read_tsv(col_names = c("split", "bin")) %>% 
  separate(split, into = c("cont", "cont_num", "split", "split_id"), sep = "_", remove = FALSE) %>% 
  mutate(cont_id = paste0(cont,"_", cont_num)) %>% 
  distinct(cont_id, .keep_all = TRUE) %>% 
  select(cont_id, bin) -> DAS

DAS %>% 
  head %>% 
  DT::datatable()
"/Users/fconstan/Projects/ETH/Alessia/anvio_chicken_SM_last/DAS-collection-HQ-MAGs.txt" %>% 
  read_tsv(col_names = c("split", "binHQ")) %>% 
  separate(split, into = c("cont", "cont_num", "split", "split_id"), sep = "_", remove = FALSE) %>% 
  mutate(cont_id = paste0(cont,"_", cont_num)) %>% 
  distinct(cont_id, .keep_all = TRUE) %>% 
  select(cont_id, binHQ) -> DASHQ

Import MAG taxonomy

"/Users/fconstan/Projects/ETH/Alessia/anvio_chicken_SM_last/MAGs_additional_data.tsv" %>% 
  read_tsv() -> MAG_tax

Combine data:

viralverify contig names

viralverify %>% 
  left_join(mapping,
            by = c("Contig name" = "megahit_contig")) %>% 
  select(-`Contig name`, -megahit_contig_full) %>% 
  select(SQM_contig, everything()) -> viralverify_fil

hannah plot:

"/Users/fconstan/Documents/GitHub/chicken/src/12_Figures/Resistome/resistome_phyloseq.rds" %>% 
  readRDS() -> physeq


sample_data(physeq)$Reactor_Treatment <- fct_relevel(sample_data(physeq)$Reactor_Treatment, "DONOR", "CR_UNTREATED", "TR1_CTX+HV292.1",
                                                     "TR2_CTX","TR3_HV292.1", "TR4_VAN", "TR5_VAN+CCUG59168", "TR6_CCUG59168") 

sample_data(physeq)$Treatment <- fct_relevel(sample_data(physeq)$Treatment, "DONOR", "UNTREATED",  "CTX+HV292.1", "CTX","HV292.1","VAN+CCUG59168", "VAN",  "CCUG59168") 

Create ARG type abundance plots:

physeq_drug <- speedyseq::tax_glom(physeq, taxrank = "Drug.Class")

#define colors
set.seed(123)
n <- phyloseq::ntaxa(physeq_drug)

col <- randomcoloR::distinctColorPalette(n)


physeq_drug %>%
  phyloseq::plot_bar(x = "Day_of_Treatment", fill = "Drug.Class") +
  scale_fill_manual(values=col) +
    # geom_bar(stat="identity",colour=NA,size=0,  position = "fill") +
  # geom_bar(stat="identity",colour=NA,size=0,  position = "fill") +
  facet_grid(~ Reactor_Treatment, scales = "free", space = "free") +
  ggtitle("ARG Type Abundance per Sample") + 
  theme(plot.title = element_text(hjust = 0.5)) +
  theme_light() +
  xlab("Day of Treatment") +
  ylab("TPM")

ARG Alpha Diversity

# extract ARG abundance info from phyloseq object
physeq %>%
  otu_table() %>%
  as.data.frame() %>%
  t() -> resistome_copy

# extract sample info
physeq %>%
  sample_data() %>%
  as.data.frame() -> treatment_info

# add sample info to resistome abundance data and get richeness
resistome_copy %>%
  specnumber() %>%
  cbind(., treatment_info) %>%
  filter(., Treatment != "DONOR")-> ARG_richeness

levels(ARG_richeness$Day_of_Treatment)[ARG_richeness$Day_of_Treatment] %>%
  as.numeric() -> ARG_richeness$Day_of_Treatment




colnames(ARG_richeness)[1] <- "Richeness"

# plot
ggplot(ARG_richeness, aes(x = Reactor_Treatment, y = Richeness)) + 
  geom_boxplot(aes(color=Reactor_Treatment),
                      outlier.shape = NA,
               outlier.colour = NA,
               outlier.size = 0) + 
  geom_jitter(aes(color=Reactor_Treatment), alpha = 0.4) +
  theme(legend.position="none") +
  xlab("Treatment") +
  ylab("ARG Richness") +
  theme_light() + scale_color_viridis_d(na.value = "black")

# plot panels for each treatment

ggplot(ARG_richeness, aes(x = Day_of_Treatment, y = Richeness, color = Reactor_Treatment)) +
  geom_line(linetype = "dashed",  size = 0.75) +
  geom_point() + 
  labs(title = "AMR Gene Richness",
       y = "Richness", x = "Days of Treatment") +
  theme_light() + scale_color_viridis_d(na.value = "black")

Beta Diversity:

sample_data(physeq)$Reactor_Treatment <- fct_relevel(sample_data(physeq)$Reactor_Treatment, "DONOR", "CR_UNTREATED", "TR1_CTX+HV292.1",
                                                     "TR2_CTX","TR3_HV292.1", "TR4_VAN", "TR5_VAN+CCUG59168", "TR6_CCUG59168") 

sample_data(physeq)$Treatment <- fct_relevel(sample_data(physeq)$Treatment, "DONOR", "UNTREATED",  "CTX+HV292.1", "CTX","HV292.1","VAN+CCUG59168", "VAN",  "CCUG59168") 


# physeq %>% 
#   rarefy_even_depth(sample.size = 43,
#                     rngseed = 123) -> phyloseq_rare

physeq %>%
  phyloseq_compute_bdiv(norm = "pc",
                        phylo = FALSE,
                        seed = 123) -> bdiv_list
## Loading required package: ape
## Loading required package: GUniFrac
## Registered S3 method overwritten by 'rmutil':
##   method         from
##   print.response httr
physeq  %>%
  subset_samples(Treatment != "DONOR") %>%
  phyloseq_plot_bdiv(dlist = bdiv_list,
                     seed = 123,
                     axis1 = 1,
                     axis2 = 2)  -> pcoa
## [1] "bray"
## [1] "sorensen"
## [1] "bjaccard"
## [1] "wjaccard"
# phyloseq_plot_bdiv(bdiv_list,
#                    # m = "CoDa",
#                    seed = 123) -> coda
# 
pcoa$wjaccard$layers = NULL

pcoa$wjaccard + geom_point(size=2,
                           aes(color = Reactor_Treatment, 
                               fill = Reactor_Treatment,
                               shape = NULL,
                               alpha = Day_of_Treatment)) + 
  theme_light() +
  geom_path(arrow = arrow(type = "open", angle = 30, length = unit(0.15, "inches")),
            size = 0.05, linetype = "dashed", inherit.aes = TRUE, aes(group=Reactor_Treatment, color = Reactor_Treatment), show.legend = FALSE) +
  #scale_alpha_continuous(range=c( 0.9, 0.3)) + 
  scale_color_viridis_d(na.value = "black") + 
  scale_fill_viridis_d(na.value = "black") + 
  # scale_shape_manual(values = c(8, 21, 22, 23, 24, 16, 15, 18, 17)) + 
  theme_classic() +
  labs(col=NULL, fill = NULL, shape = NULL) + guides(shape=FALSE) -> p1

p1

physeq %>% 
  tax_table() %>% 
  data.frame() %>% 
  rownames_to_column("id") %>% 
  mutate(gene = id) %>% 
  column_to_rownames("id") %>% 
  as.matrix %>% 
  tax_table() -> tax_table(physeq)



physeq  %>%
  subset_samples(Treatment != "DONOR") %>% 
  phyloseq_add_taxa_vector(dist = bdiv_list$wjaccard,
                           phyloseq = .,
                           figure_ord = p1,
                           tax_rank_plot = "Drug.Class", taxrank_glom = "Drug.Class",
                           top_r = 10, fact = 0.6) -> pco_env

p1

pco_env$plot

pco_env$signenvfit %>% 
  DT::datatable()
# plot.df.sep %>%
# select(sample, TPM, Best_Hit_ARO) %>% 
# pivot_wider(names_from = c("Best_Hit_ARO"), values_from = TPM) -> ready
#   t() %>%
#   pheatmap::pheatmap(clustering_distance_cols= "euclidean", 
#                      cluster_distance_rows = 'pearson', 
#                      annotation_col = full_data %>% column_to_rownames(sample_id_column) %>% dplyr::select(all_of(pca_group)), 
#                      cutree_cols = cutree_cols, 
#                      cutree_rows = cutree_rows,
#                      fontsize = 8) -> heatmap
sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Mojave 10.14.6
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] GUniFrac_1.4         ape_5.5              vegan_2.5-7         
##  [4] lattice_0.20-45      permute_0.9-5        phyloseq_1.36.0     
##  [7] forcats_0.5.1        stringr_1.4.0        dplyr_1.0.7         
## [10] purrr_0.3.4          readr_2.1.0          tidyr_1.1.4         
## [13] tibble_3.1.6         ggplot2_3.3.5        tidyverse_1.3.1.9000
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.3.1           backports_1.4.0        plyr_1.8.6            
##   [4] igraph_1.2.6           splines_4.1.2          crosstalk_1.1.1       
##   [7] GenomeInfoDb_1.28.4    digest_0.6.29          foreach_1.5.1         
##  [10] htmltools_0.5.2        lmerTest_3.1-3         fansi_0.5.0           
##  [13] magrittr_2.0.1         cluster_2.1.2          tzdb_0.2.0            
##  [16] openxlsx_4.2.4         Biostrings_2.60.2      modelr_0.1.8          
##  [19] matrixStats_0.61.0     stabledist_0.7-1       vroom_1.5.6           
##  [22] colorspace_2.0-2       rvest_1.0.2            ggrepel_0.9.1         
##  [25] haven_2.4.3            xfun_0.28              crayon_1.4.2          
##  [28] RCurl_1.98-1.5         jsonlite_1.7.2         lme4_1.1-27.1         
##  [31] survival_3.2-13        iterators_1.0.13       glue_1.5.1            
##  [34] gtable_0.3.0           zlibbioc_1.38.0        XVector_0.32.0        
##  [37] V8_3.6.0               car_3.0-11             Rhdf5lib_1.14.2       
##  [40] BiocGenerics_0.38.0    abind_1.4-5            scales_1.1.1          
##  [43] DBI_1.1.1              randomcoloR_1.1.0.1    rstatix_0.7.0         
##  [46] Rcpp_1.0.7             viridisLite_0.4.0      clue_0.3-60           
##  [49] foreign_0.8-81         bit_4.0.4              stats4_4.1.2          
##  [52] DT_0.19                timeSeries_3062.100    htmlwidgets_1.5.4     
##  [55] httr_1.4.2             ellipsis_0.3.2         spatial_7.3-14        
##  [58] pkgconfig_2.0.3        farver_2.1.0           sass_0.4.0            
##  [61] dbplyr_2.1.1           utf8_1.2.2             tidyselect_1.1.1      
##  [64] labeling_0.4.2         rlang_0.4.12           reshape2_1.4.4        
##  [67] munsell_0.5.0          cellranger_1.1.0       tools_4.1.2           
##  [70] cli_3.1.0              generics_0.1.1         ade4_1.7-18           
##  [73] broom_0.7.10           evaluate_0.14          biomformat_1.20.0     
##  [76] fastmap_1.1.0          yaml_2.2.1             knitr_1.36            
##  [79] bit64_4.0.5            fs_1.5.0               zip_2.2.0             
##  [82] nlme_3.1-153           xml2_1.3.2             compiler_4.1.2        
##  [85] rstudioapi_0.13        curl_4.3.2             ggsignif_0.6.3        
##  [88] reprex_2.0.1           statmod_1.4.36         statip_0.2.3          
##  [91] bslib_0.3.1            stringi_1.7.6          highr_0.9             
##  [94] modeest_2.4.0          fBasics_3042.89.1      Matrix_1.3-4          
##  [97] nloptr_1.2.2.2         multtest_2.48.0        vctrs_0.3.8           
## [100] pillar_1.6.4           lifecycle_1.0.1        rhdf5filters_1.4.0    
## [103] jquerylib_0.1.4        data.table_1.14.2      bitops_1.0-7          
## [106] stable_1.1.4           R6_2.5.1               rio_0.5.27            
## [109] IRanges_2.26.0         codetools_0.2-18       boot_1.3-28           
## [112] MASS_7.3-54            assertthat_0.2.1       rhdf5_2.36.0          
## [115] withr_2.4.3            S4Vectors_0.30.2       GenomeInfoDbData_1.2.6
## [118] mgcv_1.8-38            parallel_4.1.2         hms_1.1.1             
## [121] rpart_4.1-15           timeDate_3043.102      grid_4.1.2            
## [124] speedyseq_0.5.3.9018   minqa_1.2.4            rmarkdown_2.11        
## [127] carData_3.0-4          rmutil_1.1.5           Rtsne_0.15            
## [130] ggpubr_0.4.0           numDeriv_2016.8-1.1    Biobase_2.52.0        
## [133] lubridate_1.8.0